######################################
# Media Measurement Matters          #
# Replication Code                   #
# Portal Sites (App. N & O)          #
######################################

# The following file contains code for replicating the figures and analyses in
# Appendices N and O. These sections present results including portal sites 
# (Appendix N) and excluding both portal sites and Yahoo! News (another common
# aggregator site).

# Set-Up ----

# If desired, set up path into which to save plots and tables
plot_path <- NULL
table_path <- NULL

# Load packages
library(tidyverse)
library(ggridges)
library(estimatr)
library(overlap)
library(gtools)

# Set up helper operations
`%notin%` <- Negate(`%in%`)

# Set up colors
red_mit = '#A31F34'
red_light = '#A9606C'
blue_mit = '#315485'
grey_light= '#C2C0BF'
grey_dark = '#8A8B8C'
black = '#353132'

# Source helper functions
source("helper_functions.R")

# > Read in data ----

# Read in survey data
srvy <- read_rds("data/survey_data_cleaned.rds")

# Read in web data
web <- read_rds("data/web_use.rds")

# Construct a version of the web data that excludes both portal sites and Yahoo!
web_yahoo <- web %>% 
  filter(domain_recode %notin% c("msn.com", "aol.com", "google.com", "news.yahoo.com",
                                 "gma.yahoo.com"))

# Descriptive results ----

# > Portal sites ----

# Figure N1: distribution of news visits by stated media preferences, including
# portal sites
n1 <- visit_plots(df = web, var = "med_pref", var_labels = c("Prefer\nMSNBC", 
                                                             "Prefer\nEntertainment", 
                                                             "Prefer\nFox"),
                  var_levels = c("MSNBC", "Entertainment", "Fox"),
                  x_label = "Relative Slant of News Visits (With Portals)",
                  y_label = "Stated Media Preference", weights = FALSE)

ggsave(n1, path = plot_path, filename = "fig_n1.pdf",
       height= 3.5, width = 8, dpi = 600)

# Figure N2: distribution of relative slant scores by party ID (a), ideology (b), and stated 
# media preferences (c), including portal sites

# N2(a): by partisanship
n2_a <- score_plots(var = "pid", var_labels = c("Democrats", "Independents", "Republicans"),
                         var_levels = c(-1, 0, 1), x_var = "score",
                         x_label = "Respondent Avg. Slant Score\n(With Portals)",
                         y_label = "Party ID")
ggsave(n2_a, path = plot_path, filename = "fig_n2_a.pdf",
       height= 3, width = 4.5, dpi = 600)

# N2(b): by ideology
n2_b <- score_plots(var = "ideo", var_labels = c("Liberals", "Moderates", "Conservatives"),
                          var_levels = c(-1, 0, 1), x_var = "score",
                          x_label = "Respondent Avg. Slant Score\n(With Portals)",
                          y_label = "Ideology")
ggsave(n2_b, path = plot_path, filename = "fig_n2_b.pdf",
       height= 3, width = 4.5, dpi = 600)

# N2(c): by stated media preferences
n2_c <- score_plots(var = "med_pref", var_labels = c("Prefer\nMSNBC", 
                                                              "Prefer\nEntertainment", 
                                                              "Prefer\nFox"),
                             var_levels = c("MSNBC", "Entertainment", "Fox"), x_var = "score",
                             x_label = "Respondent Avg. Slant Score\n(With Portals)",
                             y_label = "Stated Media Preference")
ggsave(n2_c, path = plot_path, filename = "fig_n2_c.pdf",
       height= 3, width = 4.5, dpi = 600)

# Overlapping coefficient: respondent scores
overlap_port_medpref <- score_overlap(var = "score", group_var = "med_pref", 
                                      values = c("MSNBC", "Entertainment", "Fox"),
                                      out_contrasts = c("MSNBC vs. Fox",
                                                        "MSNBC vs. Entertainment",
                                                        "Fox vs. Entertainment"))

overlap_port_medpref$`MSNBC vs. Fox`

# Overlapping coefficient: news visits
visit_overlap_port_medpref <- visit_overlap(df = web,
                                            group_var = "med_pref", 
                                            values = c("MSNBC", "Entertainment", "Fox"),
                                            out_contrasts = c("MSNBC vs. Fox",
                                                              "MSNBC vs. Entertainment",
                                                              "Fox vs. Entertainment"))

visit_overlap_port_medpref$`MSNBC vs. Fox`

# > Without portals and Yahoo! ----

# Figure O1: distribution of news visits by stated media preferences, excluding
# portal sites and Yahoo! News
o1 <- visit_plots(var = "med_pref", var_labels = c("Prefer\nMSNBC", 
                                                   "Prefer\nEntertainment", 
                                                   "Prefer\nFox"),
                  var_levels = c("MSNBC", "Entertainment", "Fox"),
                  x_label = "Relative Slant of News Visits (With Portals and Yahoo! Removed)",
                  y_label = "Stated Media Preference", weights = FALSE, df = web_yahoo, 
                  exemplars = c("cnn.com", "msnbc.com", "usatoday.com",
                                "foxnews.com","nytimes.com"))

ggsave(o1, path = plot_path, filename = "fig_o1.pdf",
       height= 3.5, width = 8, dpi = 600)

# Figure O2: distribution of relative slant scores by party ID (a), ideology (b), and stated 
# media preferences (c), excluding portal sites and Yahoo! News

# O2(a): by partisanship
o2_a <- score_plots(var = "pid", var_labels = c("Democrats", "Independents", "Republicans"),
                    var_levels = c(-1, 0, 1), x_var = "score_yahoo",
                    x_label = "Respondent Avg. Slant Score\n(With Portals and Yahoo! Removed)",
                    y_label = "Party ID")
ggsave(o2_a, path = plot_path, filename = "fig_o2_a.pdf",
       height= 3, width = 4.5, dpi = 600)

# O2(b): by ideology
o2_b <- score_plots(var = "ideo", var_labels = c("Liberals", "Moderates", "Conservatives"),
                    var_levels = c(-1, 0, 1), x_var = "score_yahoo",
                    x_label = "Respondent Avg. Slant Score\n(With Portals and Yahoo! Removed)",
                    y_label = "Ideology")
ggsave(o2_b, path = plot_path, filename = "fig_o2_b.pdf",
       height= 3, width = 4.5, dpi = 600)

# O2(c): by stated media preferences
o2_c <- score_plots(var = "med_pref", var_labels = c("Prefer\nMSNBC", 
                                                     "Prefer\nEntertainment", 
                                                     "Prefer\nFox"),
                    var_levels = c("MSNBC", "Entertainment", "Fox"), x_var = "score_yahoo",
                    x_label = "Respondent Avg. Slant Score\n(With Portals and Yahoo! Removed)",
                    y_label = "Stated Media Preference")
ggsave(o2_c, path = plot_path, filename = "fig_o2_c.pdf",
       height= 3, width = 4.5, dpi = 600)

# Overlapping coefficient: respondent scores
overlap_yahoo_medpref <- score_overlap(var = "score_yahoo", group_var = "med_pref", 
                                       values = c("MSNBC", "Entertainment", "Fox"),
                                       out_contrasts = c("MSNBC vs. Fox",
                                                         "MSNBC vs. Entertainment",
                                                         "Fox vs. Entertainment"))

overlap_yahoo_medpref$`MSNBC vs. Fox`

# Overlapping coefficient: news visits
visit_overlap_yahoo_medpref <- visit_overlap(df = web_yahoo, 
                                             group_var = "med_pref", 
                                             values = c("MSNBC", "Entertainment", "Fox"),
                                             out_contrasts = c("MSNBC vs. Fox",
                                                               "MSNBC vs. Entertainment",
                                                               "Fox vs. Entertainment"))

visit_overlap_yahoo_medpref$`MSNBC vs. Fox`

# Experimental results ----

# Set theme for plotting
theme_set(theme_bw() + 
            theme(legend.position = "bottom",
                  plot.title = element_text(hjust = 0.5, face = "bold",size = 16),
                  plot.subtitle = element_text(hjust = 0.5, face = "italic", size = 12),
                  axis.title.x = element_text(margin = unit(c(3, 0, 0, 0), "mm"),
                                              face = "bold", size = 12, angle = 0, hjust = 0.5),
                  axis.title.y = element_text(margin = unit(c(0, 3, 0, 0), "mm"), 
                                              face = "bold", size = 12),
                  legend.title = element_text(face = "bold", hjust = 0.5, size = 12),
                  legend.text = element_text(hjust = 0.5, size = 10),
                  axis.text.y = element_text(size = 10, color = "black"),
                  axis.text.x = element_text(size = 10, color = "black"),
                  legend.box = "vertical",
                  legend.background = element_blank(),
                  legend.box.background = element_rect(colour = "black"),
                  text=element_text(colour=black, 
                                    size=15)))

# > Portal sites ----

# Identify alignment scores for two exemplar sites: cnn.com, yahoo.com/news
align_scores <- web %>% 
  group_by(domain_recode) %>% 
  summarise(avg_align = mean(avg_align)) 

cnn <- align_scores %>% filter(domain_recode == "cnn.com") %>% pull(avg_align)
yahoo <- align_scores %>% filter(domain_recode == "news.yahoo.com") %>% pull(avg_align)

# Classify respondents into groups based on their average alignment score, including
# portal sites
srvy <- srvy %>% 
  mutate(score_code_port = case_when(is.na(score) ~ NA_real_, # Exemplar sites
                                score < cnn ~ 1,
                                score < yahoo ~ 2, TRUE ~ 3),
         score_port_bin3 = ntile(score,3)) # Terciles

# Generate labels for plotting
code_port_labels <- gen_ranges(bin_var = "score_code_port", score_version = "score",
                               parentheses = TRUE)
code_port_labels <- paste(c("More Liberal\nThan CNN\n", "Between CNN\nand Yahoo!\n", 
                            "More Conserv.\nThan Yahoo!\n"), 
                          code_port_labels, sep = "")

port_labels3 <- gen_ranges(bin_var = "score_port_bin3", score_version = "score",
                           parentheses = TRUE)
port_labels3 <- paste0(c("Most\nLiberal\n", "Moderate\n", "Most\nConservative\n"), 
                       port_labels3)

# Figure N3: relative slant results, disaggregated by revealed preferences including
# portal sites (based on exemplar sites)
score_vsent_code <- group_vsent_plot(var = "score_code_port", nbins = 3, 
                                     labels = code_port_labels, weights = FALSE)

(n3 <- ggplot(na.omit(score_vsent_code %>% filter(id != "Fox vs.\nMSNBC")), 
                                aes(x=factor(val),
                                    col = factor(id, levels = c("Fox vs.\nEntertainment",
                                                                "MSNBC vs.\nEntertainment",
                                                                "Fox vs.\nMSNBC")),
                                    shape = factor(id, levels = c("Fox vs.\nEntertainment",
                                                                  "MSNBC vs.\nEntertainment",
                                                                  "Fox vs.\nMSNBC")))) +
    geom_hline(yintercept=0, col = "white") +
    geom_hline(yintercept=0, linetype="dashed", col = grey_dark) +
    geom_errorbar(aes(ymin=min_cilo90, ymax=max_cihi90),
                  width=0, lwd = 1, position = position_dodge(width = 0.5)) +
    geom_errorbar(aes(ymin=min_cilo, ymax=max_cihi),
                  width=0, position = position_dodge(width = 0.5)) +
    geom_point(aes(y=naive),
               position = position_dodge(width = 0.5),
               size = 2) +
    facet_wrap(~ outcome,nrow=1) +
    scale_x_discrete(labels = unique(score_vsent_code$bin)) + 
    xlab("Relative Slant of News Consumption (Binned, With Portals)") +
    ylab("Average Treatment Effect of\nPartisan Media vs. Entertainment") +
    scale_y_continuous(breaks=seq(-0.2,0.2,0.1),
                       labels=plot_labels()$att,
                       limits = c(-0.225, 0.225),
                       sec.axis = dup_axis(name="",
                                           breaks=seq(-0.2,0.2,0.1),
                                           labels = plot_labels()$share)) +
    scale_colour_manual("Comparison",values=c(red_mit, blue_mit)) +
    scale_shape_manual("Comparison",values=c(16, 17, 15)) +
    theme(axis.text.x = element_text(size = 10, hjust = 0.5, color = "black")))

ggsave(n3, path = plot_path, filename = "fig_n3.pdf", 
       dpi = 600, width=9, height=5.25)

# Figure N4: relative slant results, disaggregated by revealed preferences including
# portal sites (based on terciles)
score_vsent_fx <- vsent_plot(var = "score", nbins = 3, 
                             labels = port_labels3, weights = FALSE)

(n4 <- ggplot(na.omit(score_vsent_fx %>% filter(id != "Fox vs.\nMSNBC")), 
              aes(x=factor(val),
                  col = factor(id, levels = c("Fox vs.\nEntertainment",
                                              "MSNBC vs.\nEntertainment",
                                              "Fox vs.\nMSNBC")),
                  shape = factor(id, levels = c("Fox vs.\nEntertainment",
                                                "MSNBC vs.\nEntertainment",
                                                "Fox vs.\nMSNBC")))) +
    geom_hline(yintercept=0, col = "white") +
    geom_hline(yintercept=0, linetype="dashed", col = grey_dark) +
    geom_errorbar(aes(ymin=min_cilo90, ymax=max_cihi90),
                  width=0, lwd = 1, position = position_dodge(width = 0.5)) +
    geom_errorbar(aes(ymin=min_cilo, ymax=max_cihi),
                  width=0, position = position_dodge(width = 0.5)) +
    geom_point(aes(y=naive),
               position = position_dodge(width = 0.5),
               size = 2) +
    facet_wrap(~ outcome,nrow=1) +
    scale_x_discrete(labels = unique(score_vsent_fx$bin)) + 
    xlab("Relative Slant of News Consumption (Terciles, With Portals)") +
    ylab("Average Treatment Effect of\nPartisan Media vs. Entertainment") +
    scale_y_continuous(breaks=seq(-0.2,0.2,0.1),
                       labels=plot_labels()$att,
                       limits = c(-0.225, 0.225),
                       sec.axis = dup_axis(name="",
                                           breaks=seq(-0.2,0.2,0.1),
                                           labels = plot_labels()$share)) +
    scale_colour_manual("Comparison",values=c(red_mit, blue_mit)) +
    scale_shape_manual("Comparison",values=c(16, 17, 15)) +
    theme(axis.text.x = element_text(size = 10, hjust = 0.5, color = "black")))

ggsave(n4, path = plot_path, filename = "fig_n4.pdf", 
       dpi = 600, width=9, height=5.25)

# > Without portals and Yahoo! ----

# Classify respondents into groups based on their average alignment score, excluding
# portal sites and Yahoo! News
srvy <- srvy %>% 
  mutate(score_code_yahoo = case_when(is.na(score_yahoo) ~ NA_real_, # Exemplar sites
                                      score_yahoo < cnn ~ 1,
                                      score_yahoo < yahoo ~ 2, TRUE ~ 3),
         score_yahoo_bin3 = ntile(score_yahoo, 3)) # Terciles

# Generate labels for plotting
code_yahoo_labels <- gen_ranges(bin_var = "score_code_yahoo", score_version = "score_yahoo",
                               parentheses = TRUE)
code_yahoo_labels <- paste(c("More Liberal\nThan CNN\n", "Between CNN\nand Yahoo!\n", 
                            "More Conserv.\nThan Yahoo!\n"), 
                          code_yahoo_labels, sep = "")

yahoo_labels3 <- gen_ranges(bin_var = "score_yahoo_bin3", score_version = "score_yahoo",
                           parentheses = TRUE)
yahoo_labels3 <- paste0(c("Most\nLiberal\n", "Moderate\n", "Most\nConservative\n"), 
                       yahoo_labels3)

# Figure O3: relative slant results, disaggregated by revealed preferences excluding
# portal sites and Yahoo! News (based on exemplar sites)
score_vsent_code_yahoo <- group_vsent_plot(var = "score_code_yahoo", nbins = 3, 
                                           labels = code_yahoo_labels, weights = FALSE)

(o3 <- ggplot(na.omit(score_vsent_code_yahoo %>% filter(id != "Fox vs.\nMSNBC")), 
                                 aes(x=factor(val),
                                     col = factor(id, levels = c("Fox vs.\nEntertainment",
                                                                 "MSNBC vs.\nEntertainment",
                                                                 "Fox vs.\nMSNBC")),
                                     shape = factor(id, levels = c("Fox vs.\nEntertainment",
                                                                   "MSNBC vs.\nEntertainment",
                                                                   "Fox vs.\nMSNBC")))) +
    geom_hline(yintercept=0, col = "white") +
    geom_hline(yintercept=0, linetype="dashed", col = grey_dark) +
    geom_errorbar(aes(ymin=min_cilo90, ymax=max_cihi90),
                  width=0, lwd = 1, position = position_dodge(width = 0.5)) +
    geom_errorbar(aes(ymin=min_cilo, ymax=max_cihi),
                  width=0, position = position_dodge(width = 0.5)) +
    geom_point(aes(y=naive),
               position = position_dodge(width = 0.5),
               size = 2) +
    facet_wrap(~ outcome,nrow=1) +
    scale_x_discrete(labels = unique(score_vsent_code_yahoo$bin)) + 
    xlab("Relative Slant of News Consumption (Binned, Portals and Yahoo! Removed)") +
    ylab("Average Treatment Effect of\nPartisan Media vs. Entertainment") +
    scale_y_continuous(breaks=seq(-0.2,0.2,0.1),
                       labels=plot_labels()$att,
                       limits = c(-0.225, 0.225),
                       sec.axis = dup_axis(name="",
                                           breaks=seq(-0.2,0.2,0.1),
                                           labels = plot_labels()$share)) +
    scale_colour_manual("Comparison",values=c(red_mit, blue_mit)) +
    scale_shape_manual("Comparison",values=c(16, 17, 15)) +
    theme(axis.text.x = element_text(size = 10, angle = 0, hjust = 0.5, color = "black")))

ggsave(o3, path = plot_path, filename = "fig_o3.pdf", 
       dpi = 600, width=9, height=5.25)

# Figure O4: relative slant results, disaggregated by revealed preferences excluding
# portal sites and Yahoo! News (based on terciles)
score_yahoo_vsent_fx <- vsent_plot(var = "score_yahoo", nbins = 3, 
                                   labels = yahoo_labels3, weights = FALSE)

(o4 <- ggplot(na.omit(score_yahoo_vsent_fx %>% filter(id != "Fox vs.\nMSNBC")), 
                                       aes(x=factor(val),
                                           col = factor(id, levels = c("Fox vs.\nEntertainment",
                                                                       "MSNBC vs.\nEntertainment",
                                                                       "Fox vs.\nMSNBC")),
                                           shape = factor(id, levels = c("Fox vs.\nEntertainment",
                                                                         "MSNBC vs.\nEntertainment",
                                                                         "Fox vs.\nMSNBC")))) +
    geom_hline(yintercept=0, col = "white") +
    geom_hline(yintercept=0, linetype="dashed", col = grey_dark) +
    geom_errorbar(aes(ymin=min_cilo90, ymax=max_cihi90),
                  width=0, lwd = 1, position = position_dodge(width = 0.5)) +
    geom_errorbar(aes(ymin=min_cilo, ymax=max_cihi),
                  width=0, position = position_dodge(width = 0.5)) +
    geom_point(aes(y=naive),
               position = position_dodge(width = 0.5),
               size = 2) +
    facet_wrap(~ outcome,nrow=1) +
    scale_x_discrete(labels = unique(score_yahoo_vsent_fx$bin)) + 
    xlab("Relative Slant of News Consumption (Terciles, Portals and Yahoo! Removed)") +
    ylab("Average Treatment Effect of\nPartisan Media vs. Entertainment") +
    scale_y_continuous(breaks=seq(-0.2,0.2,0.1),
                       labels=plot_labels()$att,
                       limits = c(-0.225, 0.225),
                       sec.axis = dup_axis(name="",
                                           breaks=seq(-0.2,0.2,0.1),
                                           labels = plot_labels()$share)) +
    scale_colour_manual("Comparison",values=c(red_mit, blue_mit)) +
    scale_shape_manual("Comparison",values=c(16, 17, 15)) +
    theme(axis.text.x = element_text(size = 10, angle = 0, hjust = 0.5, color = "black")))

ggsave(o4, path = plot_path, filename = "fig_o4.pdf", 
       dpi = 600, width=9, height=5.25)
